On cherche à étudier l’effet de trois facteurs sur le transcriptome des racines d’Arabidopsis. On cherche ici à utiliser et comparer différents packages de multiDE sur nos données.
data <- read.csv("quantifFiles/quantifGenes.csv", h = T, sep = ",")
rownames(data) <- data$Gene
genes = which(!(grepl("__", rownames(data))))
not_quant = data[which((grepl("__", rownames(data)))), ]
data = data[genes, grepl("R", colnames(data))]
head(data) R3 R2 R1 R5 R4 R6 R7 R11 R10 R12 R13 R17 R16 R14
AT1G01010 1526 1006 1116 1275 967 1018 854 1132 1294 1364 2325 2113 2193 2564
AT1G01020 416 285 289 349 364 370 226 513 502 561 461 407 432 614
AT1G01030 31 15 19 29 36 28 12 47 34 47 18 40 32 44
R15 R18 R24 R19 R22 R23 R21 R20 R9 R8
AT1G01010 2364 2074 1987 2027 1754 1697 1537 1898 816 912
AT1G01020 380 502 484 467 426 415 413 462 223 312
AT1G01030 37 27 42 39 36 40 37 37 15 19
[ reached 'max' / getOption("max.print") -- omitted 3 rows ]
[1] 28775 24
annot <- read.csv("Code_for_RNAseq_CO2_N_Fr.csv", h = T, sep = ";")
conditions <- as.vector(unique(annot$Sample))
annot$ID <- paste0("R", annot$Code)
annot$condition <- substr(conditions, 1, nchar(conditions) - 1)
cond <- unique(substr(conditions, 1, nchar(conditions) - 1))
cond <- cond[grepl("At", cond)]
getCondition <- function(id) {
return(annot[annot$ID == id, "condition"])
}
getExactCondition <- function(id) {
return(annot[annot$ID == id, "Sample"])
}
getLabel <- function(id) {
text <- as.vector(annot[annot$ID == id, "Sample"])
res <- ""
nb <- substr(text, nchar(text), nchar(text))
if (grepl("Ambient", text)) {
res = paste0(res, "c")
} else {
res = paste0(res, "C")
}
if (grepl("High", text)) {
res = paste0(res, "N")
} else {
res = paste0(res, "n")
}
if (grepl("Starv", text)) {
res = paste0(res, "f")
} else {
res = paste0(res, "F")
}
res = paste0(res, "_", nb)
return(res)
}
getLabel("R6")[1] "cnF_3"
[1] At_AmbientCO2_LowNitrate_Fe1
48 Levels: At_AmbientCO2_HighNitrate_Fe1 ... Sl_ElevatedCO2_LowNitrate_FeStarvation3
TCC is an R package that provides a series of functions for differential expression analysis of tag count data. The package incorporates multi-step normalization methods, whose strategy is to remove potential DEGs before performing the data normalization. The normalization function based on this DEG elimination strategy (DEGES) includes (i) the original TbT method based on DEGES for two-group data with or without replicates, (ii) much faster methods for two-group data with or without replicates, and (iii) methods for multi-group comparison. TCC provides a simple unified interface to perform such analyses with combinations of functions provided by edgeR, DESeq, and baySeq.
On crée l’objet TCC avec le design souhaité, et on filtre les gènes avec de faibles expressions (paramètre low.count).
Lors de la normalisation (DEGES,iedgeR), on fait un premier calcul des gènes DE, pour pouvoir les enlever lors de la normalisation. Le maramètre test.method permet de choisir la manière de détecter les genes DE (edgeR, DEsqe2, ou tBt (très long)). On peut répéter cette procédure jusqu’à la convergence des facteurs de taille des librairies, d’ou le i.
# design
group <- sapply(colnames(data), getCondition)
colnames(data) <- sapply(colnames(data), getLabel)
tcc <- new("TCC", data, group)
tccCount:
cNF_3 cNF_2 cNF_1 cnF_2 cnF_1 cnF_3 CNF_1 CnF_2 CnF_1 CnF_3 cNf_1
AT1G01010 1526 1006 1116 1275 967 1018 854 1132 1294 1364 2325
AT1G01020 416 285 289 349 364 370 226 513 502 561 461
AT1G01030 31 15 19 29 36 28 12 47 34 47 18
cnf_2 cnf_1 cNf_2 cNf_3 cnf_3 Cnf_3 CNf_1 Cnf_1 Cnf_2 CNf_3 CNf_2
AT1G01010 2113 2193 2564 2364 2074 1987 2027 1754 1697 1537 1898
AT1G01020 407 432 614 380 502 484 467 426 415 413 462
AT1G01030 40 32 44 37 27 42 39 36 40 37 37
CNF_3 CNF_2
AT1G01010 816 912
AT1G01020 223 312
AT1G01030 15 19
[ reached getOption("max.print") -- omitted 3 rows ]
Sample:
group norm.factors lib.sizes
cNF_3 At_AmbientCO2_HighNitrate_Fe 1 32033146
cNF_2 At_AmbientCO2_HighNitrate_Fe 1 25708565
cNF_1 At_AmbientCO2_HighNitrate_Fe 1 25389227
cnF_2 At_AmbientCO2_LowNitrate_Fe 1 29410954
cnF_1 At_AmbientCO2_LowNitrate_Fe 1 26720832
cnF_3 At_AmbientCO2_LowNitrate_Fe 1 26429803
CNF_1 At_ElevatedCO2_HighNitrate_Fe 1 16944343
CnF_2 At_ElevatedCO2_LowNitrate_Fe 1 28518532
CnF_1 At_ElevatedCO2_LowNitrate_Fe 1 30134923
CnF_3 At_ElevatedCO2_LowNitrate_Fe 1 30939471
cNf_1 At_AmbientCO2_HighNitrate_FeStarvation 1 30097577
cnf_2 At_AmbientCO2_LowNitrate_FeStarvation 1 29411503
cnf_1 At_AmbientCO2_LowNitrate_FeStarvation 1 32417735
cNf_2 At_AmbientCO2_HighNitrate_FeStarvation 1 34497723
cNf_3 At_AmbientCO2_HighNitrate_FeStarvation 1 31947553
cnf_3 At_AmbientCO2_LowNitrate_FeStarvation 1 30167133
Cnf_3 At_ElevatedCO2_LowNitrate_FeStarvation 1 33780481
CNf_1 At_ElevatedCO2_HighNitrate_FeStarvation 1 30427781
Cnf_1 At_ElevatedCO2_LowNitrate_FeStarvation 1 29577460
Cnf_2 At_ElevatedCO2_LowNitrate_FeStarvation 1 30405729
CNf_3 At_ElevatedCO2_HighNitrate_FeStarvation 1 25049585
CNf_2 At_ElevatedCO2_HighNitrate_FeStarvation 1 28496828
CNF_3 At_ElevatedCO2_HighNitrate_Fe 1 18823797
CNF_2 At_ElevatedCO2_HighNitrate_Fe 1 20700940
[1] 23835 24
# Normalisation
tcc <- calcNormFactors(tcc, norm.method = "tmm", test.method = "edger", iteration = 3,
FDR = 0.1, floorPDEG = 0.05)
tcc$norm.factors cNF_3 cNF_2 cNF_1 cnF_2 cnF_1 cnF_3 CNF_1 CnF_2
0.9649419 0.9822623 0.9010228 1.0041134 1.0505083 1.0240258 0.9705235 1.0626335
CnF_1 CnF_3 cNf_1 cnf_2 cnf_1 cNf_2 cNf_3 cnf_3
1.0318007 1.0534152 1.0041470 0.9934825 1.0110141 1.0410751 0.9941804 0.9883754
Cnf_3 CNf_1 Cnf_1 Cnf_2 CNf_3 CNf_2 CNF_3 CNF_2
0.9305956 1.0027314 0.9983091 1.0067578 1.0615281 1.0271982 0.9056064 0.9897515
user system elapsed
30.302 1.986 32.414
s <- sample(rownames(tcc$count), size = 200)
heatmap(as.matrix(tcc$count[s, ]), main = "Before normalisation")normalized.count <- getNormalizedData(tcc)
heatmap(as.matrix(normalized.count[s, ]), main = "After normalisation")# DEtest
tcc <- estimateDE(tcc, test.method = "edger", FDR = 0.01)
result <- getResult(tcc, sort = TRUE)
DEgenes <- subset(result, estimatedDEG == 1)
print(paste(dim(DEgenes)[1], " genes DE"))[1] "15710 genes DE"
gene_id a.value m.value p.value q.value rank estimatedDEG
1 AT1G03870 NA NA 0 0 44.5 1
2 AT1G05660 NA NA 0 0 44.5 1
3 AT1G06120 NA NA 0 0 44.5 1
4 AT1G09932 NA NA 0 0 44.5 1
5 AT1G12030 NA NA 0 0 44.5 1
6 AT1G12080 NA NA 0 0 44.5 1
plot(tcc, group = c("At_AmbientCO2_HighNitrate_Fe", "At_ElevatedCO2_HighNitrate_Fe"),
main = "Effet CO2 en conditions normales", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_HighNitrate_FeStarvation", "At_ElevatedCO2_HighNitrate_FeStarvation"),
main = "Effet CO2 en Fe starvation", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_LowNitrate_Fe", "At_ElevatedCO2_LowNitrate_Fe"),
main = "Effet CO2 en low Nitrate", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_HighNitrate_Fe", "At_AmbientCO2_HighNitrate_FeStarvation"),
main = "Effet Fe en conditions normales", ylim = c(-11, 11))plot(tcc, group = c("At_AmbientCO2_LowNitrate_FeStarvation", "At_AmbientCO2_HighNitrate_FeStarvation"),
main = "Effet Nitrate en Fe starvation", ylim = c(-11, 11))library(reshape2)
melted_res <- melt(mat)
# ggplot(melted_res, aes(Var2, Var1, fill= log(value))) + geom_tile() +
# scale_fill_distiller(palette = 'RdPu') + theme(axis.text.x = element_text(size
# = 0.5, angle = 320, hjust = 0, colour = 'grey50')) library(viridis) heatmap <-
# ggplot(melted_res, aes(Var2, Var1, fill= log(value))) + geom_raster() +
# scale_fill_viridis(trans='sqrt') + theme(axis.text.x=element_text(angle=65,
# hjust=1), axis.text.y=element_blank(), axis.ticks.y=element_blank()) heatmap